import requests
import parsel
import csv
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
    "cookie": 'll="118252"; bid=RP1pKdPBZxw; _pk_id.100001.4cf6=af45169916d06554.1761901829.; __yadk_uid=UjH5QYbn8k14N6g9HH2dXljRqWu3Ww11; _vwo_uuid_v2=D748601678751647988003FADD42FF160|4e738d89ddc907b652244bb9834017a8; dbcl2="292055264:QXIfeyyaulw"; push_noty_num=0; push_doumail_num=0; ck=aBj2; frodotk_db="59e4bca4333ab2a00dae35ec99b017e0"; __utmc=30149280; __utmv=30149280.29205; __utmc=223695111; __utmz=30149280.1762241735.3.3.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmz=223695111.1762241745.3.3.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; ap_v=0,6.0; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1762262993%2C%22https%3A%2F%2Fwww.douban.com%2F%22%5D; _pk_ses.100001.4cf6=1; __utma=30149280.1143164688.1761901829.1762256698.1762262993.6; __utmb=30149280.0.10.1762262993; __utma=223695111.997674525.1761901829.1762256713.1762262993.6; __utmb=223695111.0.10.1762262993'
}
# 保存所有评论数据（不含推荐类型）
all_comments = []

# 豆瓣电影评论分页URL格式
base_url = 'https://movie.douban.com/subject/36296619/comments?status=P'
params = {
    'start': 0,
    'limit': 20,
    'sort': 'new_score',
    'status': 'P'
}

for page in range(0, 400, 20):  # 共20页
    params['start'] = page
    response = requests.get(base_url, headers=headers, params=params)
    html_data = response.text
    select = parsel.Selector(html_data)
    divs = select.css('#comments > div.comment-item')

    for div in divs:
        vote_count = div.css('.votes.vote-count::text').get("0")  # 点赞数
        name = div.css('.comment-info > a::text').get("")  # 用户名
        comm_time = div.css('.comment-time::text').get("").strip()  # 评论时间（去空格）
        short = div.css('.short::text').get("").strip()  # 评论内容（去空格）

        # 提取评分
        rating_class = div.css('.rating::attr(class)').get("")
        # 豆瓣评分class格式为“allstar40 rating”，提取数字部分
        rating = int(rating_class.replace('allstar', '').replace('rating', '').strip()) if rating_class else 0
        all_comments.append([vote_count, name, comm_time, short, rating])

    print(f"已爬取第{page // 20 + 1}页")
    time.sleep(5)  # 延时防反爬

# 保存到CSV文件
with open('《志愿军：浴血和平》评论.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    # 表头为“点赞数、用户名、评论时间、评论内容、评分”
    writer.writerow(['点赞数', '用户名', '评论时间', '评论内容', '评分'])
    writer.writerows(all_comments)

print(f"爬取完成！已保存到《志愿军：浴血和平》评论.csv")